#--------------------------------------------------------------------------------------------------------------------#
#------------------------------------------------------ Lea Portmann ------------------------------------------------#
#-------------------------------------------------------- May 2021 --------------------------------------------------#
#--------- What Makes a Successful Candidate? Political Experience and Low-Information Cues in Elections -----------#
#-------------------------------------------------- Descriptive statistics ------------------------------------------#
#--------------------------------------------------------------------------------------------------------------------#

library(Hmisc)
library(lme4)
library(interplot)
library(dplyr)
library(knitr)
library(ri2)
library(xtable)

rm(list=ls())
setwd(".../...")
load("d3")

#Drop incumbent candidates
d3_m <- d3[d3$incumbent == 0, ]

#---------------------------------------------------------------------- Figure 1: Information on ballot

#Data Frame

d3F1_TApoloff <- d3 %>% dplyr::group_by(TApoloff) %>% dplyr::summarise(n=n()) %>% 
dplyr::mutate(freq = (n / sum(n))*100, inf = "Political offices on ballot")
d3F1_TApoloff$freq  <- round(d3F1_TApoloff$freq, digits=1)
colnames(d3F1_TApoloff)  <- c("Treat", "n", "freq", "information")

d3F1_TAnonelec <- d3 %>% dplyr::group_by(TAnonelec) %>% dplyr::summarise(n=n()) %>% 
dplyr::mutate(freq = (n / sum(n))*100, inf="Nonelective offices on ballot")
d3F1_TAnonelec$freq <- round(d3F1_TAnonelec$freq, digits=1)
colnames(d3F1_TAnonelec) <- c("Treat", "n", "freq", "information")

dF1 <- as.data.frame(rbind(d3F1_TApoloff, d3F1_TAnonelec))

#Plot

dF1$Information <- factor(dF1$Treat, 
levels=c("T0", "T1"), 
labels=c("not on ballot", "on ballot"))

dF1$freq_fig <- dF1$freq
dF1$freq_fig[dF1$Information=="not on ballot"] <- 100
dF1$Null <- 0

colors_2 = c("#d73027", "#91bfdb", "#01665e")
line_t = c("dotted", "solid")

dF1_r <- dF1 %>% filter(Information=="on ballot")

f1 <- ggplot(dF1_r, aes(x=information, y=freq)) + geom_bar(stat="identity", fill="black", width = 0.5) + 
theme_bw() + coord_flip() + geom_text(aes(label=paste0(freq, "%")), hjust = -0.5, color="black") +
labs(y="Frequency (%)", x="", color="black") + scale_y_continuous("Proportion (%)", limits=c(0,100)) +
theme(text = element_text(size=13), axis.text.x = element_text(colour = "black"), 
axis.text.y = element_text(colour = "black")); f1

# --- Put variables numeric

d3_m$paroff_d <- as.numeric(as.character(d3_m$paroff_d))
d3_m$poloff_d <- as.numeric(d3_m$poloff_d)
d3_m$poloff_d <- ifelse(d3_m$poloff_d==1, 0, 1)
d3_m$prof_c3 <- as.numeric(as.character(d3_m$prof_c3))

Mode <- function(x) {
  ux <- unique(x)
  us[which.max(tabulate(match(x, ux)))]
}

# --- Get means with and without treatment

d_mean_poloff <- d3_m %>% dplyr:::group_by(TApoloff) %>%
  dplyr:::summarise(variable="Political offices",
                    mean_poloff=mean(poloff_d),
                    mean_nonelec=mean(nonelec_d),
                    mean_age=mean(age),
                    perc_nonswissname=sum(nonswissname)/n(),
                    perc_sex=sum(sex_f)/n(),
                    mean_prof=mean(prof_c3),
                    perc_precumulated=sum(precumulated/n()),
                    mean_relpos=mean(relpos))
colnames(d_mean_poloff) <- c("Treatment", "Variable",  
                             "Mean political office", "Mean non-elective office", 
                             "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                             "Mean Profession", "Percent precumulated", 
                             "Mean Relative position")

d_mean_nonelec <- d3_m %>% dplyr:::group_by(TAnonelec) %>%
  dplyr:::summarise(variable="Party office",
                    mean_poloff=mean(poloff_d),
                    mean_nonelec=mean(nonelec_d),
                    mean_age=mean(age),
                    perc_nonswissname=sum(nonswissname)/n(),
                    perc_sex=sum(sex_f)/n(),
                    mean_prof=mean(prof_c3),
                    perc_precumulated=sum(precumulated/n()),
                    mean_relpos=mean(relpos)) 
colnames(d_mean_nonelec) <- c("Treatment", "Variable",  
                              "Mean political office", "Mean non-elective office", 
                              "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                              "Mean Profession", "Percent precumulated", 
                              "Mean Relative position")

# ------- Political offices


declaration <- 
  with(d3_m,{
    declare_ra(clusters = c(list_id))
  })

declaration

d3_m$TApoloff_bal <- factor(d3_m$TApoloff, 
                            levels = c("T0", "T1"), 
                            labels = c(0, 1))


d3_m$prof_c3 <- factor(d3_m$prof_c3)

d3_m$Z <- as.numeric(as.character(d3_m$TApoloff_bal))

X <- d3_m %>% dplyr::ungroup() %>% dplyr::select(poloff_d, nonelec_d_eff, age,
                                   nonswissname, sex_f, prof_c3,
                                   precumulated, relpos_c)

for(i in 1:length(X)){
  
  balance_fun <- function(data){
    f_stat <- summary(lm(Z ~ X[[i]], data = data))$f[1]
    names(f_stat) <- NULL
    return(f_stat)
  }
  out <-
    conduct_ri(
      test_function = balance_fun,
      declaration = declaration,
      assignment = "Z",
      sharp_hypothesis = 0,
      data = d3_m, sims = 1000
    )
  
  plot(out)
  pvalue <- paste("p",i, sep="") 
  p <- summary(out)[3]
  assign(pvalue, p)
}
pvalue_TApoloff <- cbind("Pvalue", "Political offices", p1, p2, p3, p4, p5, p6, p7, p8)
colnames(pvalue_TApoloff) <- c("Treatment", "Variable",  
                               "Mean political office", "Mean non-elective office", 
                               "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                               "Mean Profession", "Percent precumulated", 
                               "Mean Relative position")

# ------- Nonelective offices

d3_m$TAnonelec_bal <- factor(d3_m$TAnonelec, 
                             levels = c("T0", "T1"), 
                             labels = c(0, 1))

d3_m$Z <- as.numeric(as.character(d3_m$TAnonelec_bal))

for(i in 1:length(X)){
  
  balance_fun <- function(data){
    f_stat <- summary(lm(Z ~ X[[i]], data = data))$f[1]
    names(f_stat) <- NULL
    return(f_stat)
  }
  out <-
    conduct_ri(
      test_function = balance_fun,
      declaration = declaration,
      assignment = "Z",
      sharp_hypothesis = 0,
      data = d3_m, sims = 1000
    )
  
  plot(out)
  pvalue <- paste("p",i, sep="") 
  p <- summary(out)[3]
  assign(pvalue, p)
}
pvalue_TAnonelec <- cbind("Pvalue", "Nonelective offices", p1, p2, p3, p4, p5, p6, p7, p8)
colnames(pvalue_TAnonelec) <- c("Treatment", "Variable",  
                                "Mean political office", "Mean non-elective office", 
                                "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                                "Mean Profession", "Percent precumulated", 
                                "Mean Relative position")

# --------- Table

dat_t1 <- as.data.frame(rbind(d_mean_poloff, d_mean_nonelec))
dat_t1[,3:10] <- round(dat_t1[,3:10], 2)

dat_t1 <- as.data.frame(t(dat_t1))

pvalue_TApoloff <- as.data.frame(t(pvalue_TApoloff))
pvalue_TAnonelec <- as.data.frame(t(pvalue_TAnonelec))
dat_t2 <- cbind(dat_t1, pvalue_TApoloff, pvalue_TAnonelec)
dat_t3 <- dat_t2[, c(1, 2, 5, 3, 4, 6)]

xtable(dat_t3)


# --- Exclude list where no candidate has political office or nonelective office

d3_m$prof_c3 <- as.numeric(as.character(d3_m$prof_c3))

d3_m <- d3_m %>% 
dplyr::group_by(list_id) %>%
dplyr::mutate(sum_poloff_list = sum(poloff_d))

d3_m_red <- d3_m %>% filter(sum_poloff_list!=0)

d_mean_poloff <- d3_m_red %>% dplyr:::group_by(TApoloff) %>%
  dplyr:::summarise(variable="Political offices",
                    mean_poloff=mean(poloff_d),
                    mean_nonelec=mean(nonelec_d),
                    mean_age=mean(age),
                    perc_nonswissname=sum(nonswissname)/n(),
                    perc_sex=sum(sex_f)/n(),
                    mean_prof=mean(prof_c3),
                    perc_precumulated=sum(precumulated/n()),
                    mean_relpos=mean(relpos))
colnames(d_mean_poloff) <- c("Treatment", "Variable",  
                             "Mean political office", "Mean non-elective office", 
                             "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                             "Mean Profession", "Percent precumulated", 
                             "Mean Relative position")

d_mean_nonelec <- d3_m_red %>% dplyr:::group_by(TAnonelec) %>%
  dplyr:::summarise(variable="Party office",
                    mean_poloff=mean(poloff_d),
                    mean_nonelec=mean(nonelec_d),
                    mean_age=mean(age),
                    perc_nonswissname=sum(nonswissname)/n(),
                    perc_sex=sum(sex_f)/n(),
                    mean_prof=mean(prof_c3),
                    perc_precumulated=sum(precumulated/n()),
                    mean_relpos=mean(relpos)) 
colnames(d_mean_nonelec) <- c("Treatment", "Variable",  
                              "Mean political office", "Mean non-elective office", 
                              "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                              "Mean Profession", "Percent precumulated", 
                              "Mean Relative position")

# ------- Political offices

declaration <- 
  with(d3_m_red,{
    declare_ra(clusters = c(list_id))
  })

declaration

d3_m_red$TApoloff_bal <- factor(d3_m_red$TApoloff, 
                            levels = c("T0", "T1"), 
                            labels = c(0, 1))

d3_m$prof_c3 <- factor(d3_m$prof_c3)

d3_m_red$Z <- as.numeric(as.character(d3_m_red$TApoloff_bal))

X <- d3_m_red %>% dplyr::ungroup() %>% dplyr::select(poloff_d, nonelec_d_eff, age_c3,
                                   nonswissname, sex_f, prof_c3,
                                   precumulated, relpos_c)

for(i in 1:length(X)){
  
  balance_fun <- function(data){
    f_stat <- summary(lm(Z ~ X[[i]], data = data))$f[1]
    names(f_stat) <- NULL
    return(f_stat)
  }
  out <-
    conduct_ri(
      test_function = balance_fun,
      declaration = declaration,
      assignment = "Z",
      sharp_hypothesis = 0,
      data = d3_m_red, sims = 1000
    )
  
  plot(out)
  pvalue <- paste("p",i, sep="") 
  p <- summary(out)[3]
  assign(pvalue, p)
}
pvalue_TApoloff <- cbind("Pvalue", "Political offices", p1, p2, p3, p4, p5, p6, p7, p8)
colnames(pvalue_TApoloff) <- c("Treatment", "Variable",  
                               "Mean political office", "Mean non-elective office", 
                               "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                               "Mean Profession", "Percent precumulated", 
                               "Mean Relative position")

# ------- Nonelective offices

d3_m_red$TAnonelec_bal <- factor(d3_m_red$TAnonelec, 
                             levels = c("T0", "T1"), 
                             labels = c(0, 1))

d3_m_red$Z <- as.numeric(as.character(d3_m_red$TAnonelec_bal))

for(i in 1:length(X)){
  
  balance_fun <- function(data){
    f_stat <- summary(lm(Z ~ X[[i]], data = data))$f[1]
    names(f_stat) <- NULL
    return(f_stat)
  }
  out <-
    conduct_ri(
      test_function = balance_fun,
      declaration = declaration,
      assignment = "Z",
      sharp_hypothesis = 0,
      data = d3_m_red, sims = 1000
    )
  
  plot(out)
  pvalue <- paste("p",i, sep="") 
  p <- summary(out)[3]
  assign(pvalue, p)
}
pvalue_TAnonelec <- cbind("Pvalue", "Nonelective offices", p1, p2, p3, p4, p5, p6, p7, p8)
colnames(pvalue_TAnonelec) <- c("Treatment", "Variable",  
                                "Mean political office", "Mean non-elective office", 
                                "Mean age", "Percentage non-Swiss name", "Percentage Female", 
                                "Mean Profession", "Percent precumulated", 
                                "Mean Relative position")

# --------- Table

dat_t1 <- as.data.frame(rbind(d_mean_poloff, d_mean_nonelec))
dat_t1[,3:10] <- round(dat_t1[,3:10], 2)

dat_t1 <- as.data.frame(t(dat_t1))

pvalue_TApoloff <- as.data.frame(t(pvalue_TApoloff))
pvalue_TAnonelec <- as.data.frame(t(pvalue_TAnonelec))
dat_t2 <- cbind(dat_t1, pvalue_TApoloff, pvalue_TAnonelec)
dat_t3 <- dat_t2[, c(1, 2, 5, 3, 4, 6)]

xtable(dat_t3)

